{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Autoregressive Moving Average (ARMA): Sunspots data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.api as sm\n",
    "from scipy import stats\n",
    "from statsmodels.tsa.arima.model import ARIMA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.graphics.api import qqplot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sunspots Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(sm.datasets.sunspots.NOTE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dta = sm.datasets.sunspots.load_pandas().data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dta.index = pd.Index(sm.tsa.datetools.dates_from_range(\"1700\", \"2008\"))\n",
    "dta.index.freq = dta.index.inferred_freq\n",
    "del dta[\"YEAR\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dta.plot(figsize=(12, 8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(12, 8))\n",
    "ax1 = fig.add_subplot(211)\n",
    "fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)\n",
    "ax2 = fig.add_subplot(212)\n",
    "fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arma_mod20 = ARIMA(dta, order=(2, 0, 0)).fit()\n",
    "print(arma_mod20.params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arma_mod30 = ARIMA(dta, order=(3, 0, 0)).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(arma_mod30.params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Does our model obey the theory?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sm.stats.durbin_watson(arma_mod30.resid.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(12, 8))\n",
    "ax = fig.add_subplot(111)\n",
    "ax = arma_mod30.resid.plot(ax=ax)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "resid = arma_mod30.resid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stats.normaltest(resid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(12, 8))\n",
    "ax = fig.add_subplot(111)\n",
    "fig = qqplot(resid, line=\"q\", ax=ax, fit=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(12, 8))\n",
    "ax1 = fig.add_subplot(211)\n",
    "fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)\n",
    "ax2 = fig.add_subplot(212)\n",
    "fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True)\n",
    "data = np.c_[np.arange(1, 25), r[1:], q, p]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table = pd.DataFrame(data, columns=[\"lag\", \"AC\", \"Q\", \"Prob(>Q)\"])\n",
    "print(table.set_index(\"lag\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* This indicates a lack of fit."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* In-sample dynamic prediction. How good does our model do?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predict_sunspots = arma_mod30.predict(\"1990\", \"2012\", dynamic=True)\n",
    "print(predict_sunspots)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mean_forecast_err(y, yhat):\n",
    "    return y.sub(yhat).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise: Can you obtain a better fit for the Sunspots model? (Hint: sm.tsa.AR has a method select_order)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Simulated ARMA(4,1): Model Identification is Difficult"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.tsa.arima_process import ArmaProcess"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(1234)\n",
    "# include zero-th lag\n",
    "arparams = np.array([1, 0.75, -0.65, -0.55, 0.9])\n",
    "maparams = np.array([1, 0.65])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's make sure this model is estimable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arma_t = ArmaProcess(arparams, maparams)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arma_t.isinvertible"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arma_t.isstationary"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* What does this mean?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(12, 8))\n",
    "ax = fig.add_subplot(111)\n",
    "ax.plot(arma_t.generate_sample(nsample=50))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arparams = np.array([1, 0.35, -0.15, 0.55, 0.1])\n",
    "maparams = np.array([1, 0.65])\n",
    "arma_t = ArmaProcess(arparams, maparams)\n",
    "arma_t.isstationary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arma_rvs = arma_t.generate_sample(nsample=500, burnin=250, scale=2.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(12, 8))\n",
    "ax1 = fig.add_subplot(211)\n",
    "fig = sm.graphics.tsa.plot_acf(arma_rvs, lags=40, ax=ax1)\n",
    "ax2 = fig.add_subplot(212)\n",
    "fig = sm.graphics.tsa.plot_pacf(arma_rvs, lags=40, ax=ax2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* For mixed ARMA processes the Autocorrelation function is a mixture of exponentials and damped sine waves after (q-p) lags.\n",
    "* The partial autocorrelation function is a mixture of exponentials and dampened sine waves after (p-q) lags."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lags = int(10 * np.log10(arma_rvs.shape[0]))\n",
    "arma11 = ARIMA(arma_rvs, order=(1, 0, 1)).fit()\n",
    "resid = arma11.resid\n",
    "r, q, p = sm.tsa.acf(resid, nlags=lags, fft=True, qstat=True)\n",
    "data = np.c_[range(1, lags + 1), r[1:], q, p]\n",
    "table = pd.DataFrame(data, columns=[\"lag\", \"AC\", \"Q\", \"Prob(>Q)\"])\n",
    "print(table.set_index(\"lag\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "arma41 = ARIMA(arma_rvs, order=(4, 0, 1)).fit()\n",
    "resid = arma41.resid\n",
    "r, q, p = sm.tsa.acf(resid, nlags=lags, fft=True, qstat=True)\n",
    "data = np.c_[range(1, lags + 1), r[1:], q, p]\n",
    "table = pd.DataFrame(data, columns=[\"lag\", \"AC\", \"Q\", \"Prob(>Q)\"])\n",
    "print(table.set_index(\"lag\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise: How good of in-sample prediction can you do for another series, say, CPI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "macrodta = sm.datasets.macrodata.load_pandas().data\n",
    "macrodta.index = pd.Index(sm.tsa.datetools.dates_from_range(\"1959Q1\", \"2009Q3\"))\n",
    "cpi = macrodta[\"cpi\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Hint: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(12, 8))\n",
    "ax = fig.add_subplot(111)\n",
    "ax = cpi.plot(ax=ax)\n",
    "ax.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "P-value of the unit-root test, resoundingly rejects the null of a unit-root."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(sm.tsa.adfuller(cpi)[1])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
